********************************************************
* CREATING THE LAND VARIABLES AND THE FINAL DATASET    *
*                --------------                        *
* This do-file uses the raw area measures from the     *
* polygons to create three land measures. In addition, * 
* it collects or creates others plants characteristics * 
* as well as others useful variables to assess the     * 
* quality of the the final dataset.                    *
********************************************************


clear
set more off


// Working directories
cd "$workpath"


// Locals we need for looping across years and IO directories
local years "2001 2003 2005 2007 2009 2011 2013 2017"


use "$datapath/private/address_id.dta", clear
tostring scottsid, replace
sort scottsid year
merge 1:1 scottsid year using "$outpath/scotts_global.dta", force
drop _merge


// Merging with the new geocoded set of addresses to related each plant to its correct geographic coordinates
// The scotts_address.dta is obtained by processing the various options of geocoding to assign a unique set of geocgraphic coordinates to each plants
// (see "traitement_geocodage.do")
sort adressid

merge m:1 adressid using "$outpath/address_geo2001_2017.dta", force keepusing(scotts_address lat_s lon_s dmti_address acc_d lon_d lat_d google_address1 acc_g1 lon_g1 lat_g1 type_1 google_address2 acc_g2 lon_g2 lat_g2 type_2 acc_g2adj lon lat geocodedby quality accuracy)

drop _merge

// Correct some errors in variables and generate provID and region variables
gen provID=.
replace provID = 10 if province == "NL"
replace provID = 11 if province == "PE"
replace provID = 12 if province == "NS"
replace provID = 13 if province == "NB"
replace provID = 24 if province == "QC"
replace provID = 35 if province == "ON"
replace provID = 46 if province == "MB"
replace provID = 47 if province == "SK"
replace provID = 48 if province == "AB"
replace provID = 59 if province == "BC"
replace provID = 60 if province == "YT"
replace provID = 61 if province == "NT"
replace provID = 62 if province == "NU"


// Add the dissemination area population and density
// To have the scotts_address_da.dta dataset we use the set of new geocoded addresses that we process to obtain the scotts_adress.dta file and we perform a spatial join with the dissemination area shape file using ARCGIS software

merge m:1 adressid using "$datapath/private/address_da.dta", force

ren ERUID eruid
ren DAUID dauid
ren pop2016 da_pop

capture drop _merge

destring dauid, replace



// Add the dissemination area population and density
// To have the scotts_address_da.dta dataset we use the set of new geocoded addresses that we process to obtain the scotts_adress.dta file and we perform a spatial join with the dissemination area shape file using ARCGIS software
merge m:1 dauid using "$datapath/census_da_var_ring.dta"

capture drop _merge

sort scottsid year
destring *pop* *area*, replace
gen da_density0 = da_pop / da_area
capture replace da_density = pop2016 / area
gen da_density05 = pop500 / area500
gen da_density15 = pop1500 / area1500
gen ph500 = vtoth500 / nbh500
gen ph1500 = vtoth1500 / nbh1500

drop if scottsid == ""
drop if employment <= 0


save "$temppath/scotts_all.dta", replace 


//Merging with data from parcels, buildings, abd heights into a single file

use "$temppath/scotts_all.dta", clear

merge m:1 adressid using "$outpath/scotts_heights.dta", keepusing(assignedbyh disth height idparcelh prov_stateh polygonh state_idh FB_area_d FB_area_c)
drop _merge

merge m:1 adressid using "$outpath/scotts_buildings.dta", keepusing(assignedbyb distb idparcelb polygonb prov_stateb state_idb B_area_c B_area_d)
drop _merge

merge m:1 adressid using "$outpath/scotts_parcels.dta", keepusing(assignedbyp distp idparcelp polygonp prov_statep state_idp P_area_c P_area_d number_NEQ)
drop _merge



// Merge with additional dataset

// A: Merging the dataset of raw area measures with the datasets of correspondance building-parcel (Each parcel has as corresponding building values the sum of all the building located on it. This is meant to account for parcels that host
// more than one building)
// Note: The buildings_on_parcel.dta files comes from a spatial join process process and converted into .dta file
merge m:1 state_idp idparcelp using "$datapath/buildings_on_parcel.dta"
drop if _merge==2
drop _merge

gen test = substr(state_idb, 1, 2)
replace state_idb = "ontario" if test == "on"
replace state_idb = "alberta" if test == "al"
replace state_idb = "britishcolumbia" if test == "br"
replace state_idb = "newbrunswick" if test == "ne"
replace state_idb = "quebec" if test == "qu"
replace state_idb = "saskatchewan" if test == "sa"
replace state_idb = "manitoba" if test == "ma"

// B: Merging the dataset of raw area measures with the datasets of correspondance parcel-building (Each building has as corresponding parcel values the sum of all the parcels that it overlaps. This is meant to account for cases where a building is built on more than parcels)
// Note: The parcels_on_building.dta files comes from a spatial join process process and converted into .dta file
destring idparcelb, replace
merge m:1 state_idb idparcelb using "$datapath/parcels_on_building.dta"
drop if _merge==2
drop _merge

compress

// Save temp file
save "$temppath/scotts_adjusted.dta",replace 



use "$temppath/scotts_adjusted.dta", clear

// C: Merge with the dataset for assignation checks, where an address is drawn from SKcotts and the identifier of its related polygon from our procudre is compared with is actual identifier from "infolot-Quebec" 
destring adressid, replace
merge m:1 adressid using "$datapath/infolot_qc_data.dta"
drop _merge


gen idparcelp0 = idparcelp							  // Generate a comaprison variable idparcel
replace idparcelp0 = subinstr(idparcelp0, " ", "", .) // Format the values to avoid mismatch due to format
replace infolot = subinstr(infolot, " ", "", .)
gen matchp = (infolot == idparcelp0) if infolot != "" // Compare identifier from our procedure to infolot-Quebec id


// D: Merge with the town centers file: The file comes from a procedure of centers identification starting with a C++ routine and some aggregation using ArcGis
//merge m:1 adressid using "$datapath/private/scotts_XY75501.dta"
//drop _merge


// E: Merge with the land-use regulation file : The file comes from spatial join process where each plant is related to is zoning category
destring scottsid, replace
merge m:1 year scottsid using "$datapath/private/scotts_dmti_lur.dta", keepusing(category)
drop _merge


// F: Prepare the estimation of the number of floors, by merging with a file containing heights and numbers of floors for 421 buildings in Canada 
drop if missing(scottsid)
drop if missing(year)

// Merge with the previous file
// Note: This is a file of 421 building heights and their corresponding number of floors from a web site of building height in canada. We have assigned arbitrary "year" and "scottsid" in order to join with the scotts file.
destring scottsid, replace
merge 1:1 year scottsid using "$datapath/floors_height.dta", keepusing(province0 use0 height0 nbfloor0)
drop _merge

// Fit a model and perform predictions of the number of floors for buildings that have height information
gen height1 = height0							       // Save heights values before modifying the variable
replace height0 = 3.28084 * height0    			       // Converting height measures from meter to feet
reg nbfloor0 height0							       // Fit the model
replace height0 = height						       // Consider the height values from the dataset for the prédiction

// Fit
local coeff = _b[height]
gen pr_nbfloor = `coeff' * height
gen nbfloor = round(pr_nbfloor)					       //Round values to integer
replace nbfloor = 1 if nbfloor < 1 & !missing(nbfloor) //Assign the value 1 to values <1

capture drop nbfloor0 height0 use0 province0 height1 pr_nbfloor   //Dropping unnecessary variables (use1 prov00)
compress


// G: Adjuste the raw area measures 
// Note: XYZ means value of Z from the spatial join process where Y collects all the X to which it is related,
// e.g : PBP = value of the sum of parcels located on the corresponding building
replace P_area_c = float(P_area_c)    		// Avoid difference due to rounding values to different number of decimals
replace B_area_c = float(B_area_c) 			// Avoid difference due to rounding values to different number of decimals
replace BPP_area_c = float(BPP_area_c)		// Avoid difference due to rounding values to different number of decimals
replace PBP_area_c = float(PBP_area_c)		// Avoid difference due to rounding values to different number of decimals        
replace PBB_area_c = float(PBB_area_c)      // Avoid difference due to rounding values to different number of decimals
replace BPB_area_c = float(BPB_area_c)      // Avoid difference due to rounding values to different number of decimals


// adjusting parcel, building and height size
gen P_area_cold = P_area_c					// Save the old values of parcels
// Note: When a plant is associated to a parcel and a building, but the building overlaps more than one parcel, 
// we replace the value of the parcel by the sum of all the parcels that the building overlaps
replace P_area_c = PBP_area_c if PBP_area_c > P_area_c & !missing(PBP_area_c) & !missing(P_area_c)  


// MODIFIED, Feb 6
//replace P_area_c = BPP_area_c if P_area_c < B_area_c & BPP_area_c > B_area_c & !missing(PBP_area_c) & !missing(P_area_c) 
replace P_area_c = BPP_area_c if P_area_c < B_area_c & BPP_area_c > B_area_c & !missing(BPP_area_c) & !missing(P_area_c)
// MODIFIED, Feb 6

// When a parcel value is lower than the building value, then replace that original parcel's value by the value of the parcel to which the building is related from the building-on-parcel spatial join

gen B_area_cold = B_area_c
replace B_area_c = BPB_area_c if BPB_area_c > B_area_c & BPB_area_c < P_area_c & !missing(BPB_area_c) & !missing(B_area_c)  // Take the total value of multiple buildings on the same polygon

// MODIFIED, Feb 6
//replace B_area_c = PBB_area_c if P_area_c < B_area_c  & PBB_area_c < P_area_c & !missing(BPB_area_c) & !missing(B_area_c)
replace B_area_c = PBB_area_c if P_area_c < B_area_c  & PBB_area_c < P_area_c & !missing(PBB_area_c) & !missing(B_area_c)	 
// MODIFIED, Feb 6

// Take value of building from the parcel to building process if parcel-on-building spatial join to have the coherence  parcel>building

gen FB_area_cold = FB_area_c
replace FB_area_c = B_area_c if !missing(FB_area_c) & !missing(B_area_c) // Take as building footprint the values of the building polygon when available

// Generate floorspace measure
gen F_area_c = nbfloor * FB_area_c						// Generate the floorspace variable
gen F_area_d = nbfloor * FB_area_d
// Not all the polygons of the height dataset do have a value for the height variable. As a result, the missings are different for FB_area_c and F_area_c


capture drop polyshare polylabor *_land_* _merge spacemate


// Generate variables to use for the apportionment of shared polygons : We will assuming same share of land for all firms that share the same polygon
foreach i in p b h {

	bysort year state_id`i' idparcel`i'  : egen spacemate`i' = count(scottsid) if !missing(state_id`i') & !missing(idparcel`i') & !missing(scottsid) & !missing(year) //Generate number of plants on each polygon
	
	bysort year state_id`i' idparcel`i'  : egen spacemate`i'M = count(scottsid) if !missing(state_id`i') & !missing(idparcel`i') & !missing(scottsid) & !missing(year) & naics2d == 30 //Generate number of plants on each polygon containing a manufacturing plant

}


gen manuf = (naics2d == 30)  // Generate a manufacturing dummy indicating if the plant operates in the manufacturing industry

encode exportindicator, gen(exports)
encode headofficestatus, gen(hoffice)

replace acc_g1 = "Not found by the geocoder1" if acc_g1 == ""
replace acc_g2 = "Not found by the geocoder2" if acc_g2 == ""
replace acc_d = "Not found by DMTI" if acc_d == ""
//gen Acc = "Level of accuracy"

//egen Centers75501 = group(CX75501 CY75501)	// Generate an identifier for each center
//replace d75501 = d75501 / 1000   			// From meters to Kilometers

//gen far = land_hc/land_pc  					
//gen bpr=land_bc/land_pc   				

gen rural = da_density < 400 & !missing(da_density) //Generating the population density variable

// Groupe zoning categories into three rather than 7
encode category, gen(category0)
recode category0 (2=1) (4/5=3) (8=3) (7=1) (6=2), gen(cat0)

// Harmonize province variables
replace provID = 10 if province == "NL"  
replace provID = 11 if province == "PE"
replace provID = 12 if province == "NS"
replace provID = 13 if province == "NB"
replace provID = 24 if province == "QC"
replace provID = 35 if province == "ON"
replace provID = 46 if province == "MB"
replace provID = 47 if province == "SK"
replace provID = 48 if province == "AB"
replace provID = 59 if province == "BC"
replace provID = 60 if province == "YT"
replace provID = 61 if province == "NT"
replace provID = 62 if province == "NU"
encode province, gen(prov0)


// H: Generate the quality variable based on the relation between the match variable and the assigned categories
// Rename and replacing the quality variable of the geocoding to avoid confusion with the overall quality of the land measure that we will generate below

ren quality geoquality
replace geoquality = 4 if geoquality == .      // Create a category for addresses not found by the geocoders

capture encode assignedbyp, gen(assignedbyp0)  // Based on cross tabulation of assignedby and match

//gen qualityp = .
//replace qualityp = 1 if (assignedbyp0 == 5 | assignedbyp0 == 3) // With centroid-border + within
//replace qualityp = 2 if (assignedbyp0 == 6 | assignedbyp0 == 7) & !missing(assignedbyp0) // With within-border + within-center
//replace qualityp = 3 if assignedbyp0 <= 2 | assignedbyp0 == 4 // Border + centroid + centroid-border-within
//replace qualityp = . if geoquality > 1

gen qualityp = .
replace qualityp = 1 if (assignedbyp0 == 5 | assignedbyp0 == 4) // With centroid-border-within-border + within
replace qualityp = 2 if (assignedbyp0 == 6 | assignedbyp0 == 7) & !missing(assignedbyp0) // With within-border + within-center
replace qualityp = 3 if assignedbyp0 <= 2 | assignedbyp0 == 3 // Border + centroid + centroid-border
replace qualityp = . if geoquality > 1


// For buildings
capture encode assignedbyb, gen(assignedbyb0)  // based on cross tabulation of assignedby and match

//gen qualityb=.
//replace qualityb = 1 if assignedbyb0 == 2 | assignedbyb0 == 3 | assignedbyb0 == 6 | assignedbyb0 == 7 // centroid + //centroid-border + within-border + within-centroid
//replace qualityb = 2 if assignedbyb0 == 5 & !missing(assignedbyb0) // within
//replace qualityb = 3 if assignedbyb0 == 1 | assignedbyb0 == 4  // border + centroid-border-within
//replace qualityb = . if geoquality>1

gen qualityb=.
replace qualityb = 1 if assignedbyb0 == 4 | assignedbyb0 == 3 | assignedbyb0 == 6 | assignedbyb0 == 7 // centroid-border + within-border + within-centroid + centroid-border-within
replace qualityb = 2 if assignedbyb0 == 5 & !missing(assignedbyb0) // within
replace qualityb = 3 if assignedbyb0 == 1 | assignedbyb0 == 2  // border + centroid
replace qualityb = . if geoquality>1


// Generate the origin_geo based on equality between lon/lat and the lon_* / lat_*
replace geocodedby = "google1" if lon == lon_g1 & lat == lat_g1
replace geocodedby = "google2" if lon == lon_g2 & lat == lat_g2 & geocodedby == ""
replace geocodedby = "dmti" if lon == lon_d & lat == lat_d & geocodedby == ""


// Generating/encoding variables
encode eruid, gen(eruid0)


// Final cleaning the dataset
drop if missing(scottsid)
drop if missing(province)

compress

save "$outpath/scotts_final.dta", replace

keep qualityp qualityb year manuf naics3d province
save "$outpath/database_for_desc.dta", replace // data we need for the descriptives in Tables B1-B3



// I: We merge the establishment Scotts data with relevant additional information

//We create population at the CMA level

use "$datapath/census_cma.dta", clear

keep cmapuid pop year
tostring cmapuid, replace
gen cmauid=substr(cmapuid,3,5)

destring cmapuid cmauid , replace

duplicates tag cmapuid year, gen(tag1)
tab tag1
//One observation per cma-province-year

duplicates tag cmauid year, gen(tag2)
tab tag2
//Some CMAs with multiple observations

tab cmauid if tag2==1
//
//     cmauid |      Freq.     Percent        Cum.
// ------------+-----------------------------------
//        330 |          8       20.00       20.00
//        502 |          8       20.00       40.00
//        505 |          8       20.00       60.00
//        515 |          8       20.00       80.00
//        840 |          8       20.00      100.00
// ------------+-----------------------------------
//      Total |         40      100.00
//
//These are CMAs and CAs that cross provincial boundaries (Campbellton 330, Hawkesbury 502, Ottawa-Gatineau 505, Pembroke 515, Lloydminster 840)


collapse (sum) pop, by(cmauid year)

rename pop pop_cma

reshape wide pop_cma, i(cmauid) j(year)

save "$outpath/census_pop_cma.dta", replace



use "$outpath/scotts_final.dta", clear

//We only keep manuf establishments
keep if manuf == 1
tab year
//
//        Year |      Freq.     Percent        Cum.
//------------+-----------------------------------
//       2001 |     54,379       13.81       13.81
//       2003 |     52,746       13.40       27.21
//       2005 |     50,364       12.79       40.00
//       2007 |     48,015       12.19       52.19
//       2009 |     46,346       11.77       63.96
//       2011 |     44,270       11.24       75.21
//       2013 |     37,747        9.59       84.79
//      2017 |     32,829        8.34       93.13
//       2019 |     27,045        6.87      100.00
//------------+-----------------------------------
//      Total |    393,741      100.00
//


//We generate employment in 2013 that will be used for accounting for past employment growth
keep if year == 2013 | year == 2017

egen t = group(year)
tsset scottsid t
gen emp2013 = l.emp

//We only keep 2017 for the subsequent analysis
keep if year == 2017 

// We merge with distance to major transport infrastructure data
// Note: This file is a GIS output
merge 1:1 scottsid using "$datapath/private/scotts_distances.dta" 
drop if _merge==2
drop _merge

// We merge with CMA population (from the Canadian Census as obtained from CHASS)
destring cmauid, replace
merge m:1 cmauid using "$datapath/census_pop_cma.dta"
keep if _m==3
drop _merge


//We merge with nb of functions performed in the establishment
merge 1:1 scottsid using "$datapath/private/num_business_lines.dta"
drop if _m==2
drop _merge

//We merge with nb of naic4 in which the establishment is active
merge 1:1 scottsid using "$datapath/private/num_naics4.dta"
drop if _m==2
drop _merge

//We merge with the nb products produced in the establishment
merge 1:1 scottsid using "$datapath/private/num_products.dta"
drop if _m==2
drop _merge

// We merge with distance to city-centres 
// Note: These are computed running the C++ code explained in the subfolder
// /clustering and computing the the distances and weighted distances using
// GIS software
preserve
use "$datapath/private/scotts_city_centres.dta", clear
keep scottsid cmauid_NEW dist_weighted_OLD dist_weighted_NEW dist_min_NEW

save "$temppath/temp.dta", replace

restore

// REMOVED
//merge m:1 scottsid year using "$datapath/private/scotts_distance_weighted.dta"
//drop if _m==2
//drop _m
// REMOVED
merge m:1 scottsid using "$temppath/temp.dta"
drop if _m==2
drop _merge
erase "$temppath/temp.dta"


//pwcorr dist_w dist_weighted_OLD dist_weighted_NEW dist_min_NEW d75501, star(0.01)
 
//
// 
//             |   dist_w dist_w~D dist_w~W   d75501 dist_m~W
// -------------+---------------------------------------------
//      dist_w |   1.0000 
//dist_weigh~D |   0.6592*  1.0000 
//dist_weigh~W |   0.5743*  0.4216*  1.0000 
//      d75501 |   0.6607*  0.9998*  0.4221*  1.0000 
//dist_min_NEW |   0.7478*  0.6228*  0.6242*  0.6229*  1.0000 
//

//We merge with Montreal role information
preserve
use "$datapath/private/land_mtl_data.dta", clear
bys scottsid: egen var=max(year)
keep if year==var
keep P_area_d etage_hors F_area_d scottsid
destring scottsid, replace

rename P_area_d P_area_role
rename F_area_d F_area_role
rename etage_hors etage_role

compress 

save "$temppath/temp.dta", replace
restore

merge m:1 scottsid using "$temppath/temp.dta"
drop if _m==2
gen mtl_in= _m==3
drop _merge

compress

save "$outpath/database_for_reg.dta", replace



*We create the variables we will need for the regressions
//Log of parcel size per worker, building footprint per worker, floorspace per worker and employment
gen Lnland_pwc = -log(P_area_c / employment)
gen Lnland_bwc = -log(B_area_c / employment)
gen Lnland_fwc = -log(F_area_c / employment)
gen Lnland_pwc_role = -log(P_area_role / employment)
gen Lnland_fwc_role = -log(F_area_role / employment)
gen Lnemp = log(employment)

gen ratio = Lnland_pwc - Lnland_bwc
gen ratio_bis = Lnland_pwc_role - Lnland_bwc
//gen ratio = Lnland_bwc - Lnland_pwc 
//gen ratio_bis = Lnland_bwc - Lnland_pwc_role 

//Number of neighbors on parcels and in buildings and their polynomials
foreach v of var spacematep spacemateb number_NEQ {
	replace `v'=`v'-1
	gen `v'2=`v'^2
	gen `v'3=`v'^3
	gen `v'4=`v'^4
}

//Log of several control variables //dist_weighted_OLD
foreach var in  popcma areacma dist_weighted_NEW dist_min_NEW dist_maj_air dist_maj_sea dist_min_air dist_stat_freight dist_junction da_density05 da_density15 {
	gen Ln`var'=log(`var')
} 


label var Lnland_pwc "Ln Employment over parcel size"
label var Lnland_bwc "Ln Employment over building footprint"
label var Lnland_fwc "Ln Employment over floorspace"
label var Lnland_pwc_role "Ln Employment over parcel size - Montreal Roll"
label var Lnland_fwc_role "Ln Employment over floorspace - Montreal Roll"
label var Lnpopcma "Ln CMA's population"
label var Lnareacma "Ln CMA surface area"
label var dist_min_NEW "Distance to the closest city centre"
label var dist_weighted_NEW "Weighted Distance to city centre(s)"
label var Lndist_maj_air "Ln Distance to major airport"
label var Lndist_maj_sea "Ln Distance to major seaport"
label var Lndist_min_air "Ln Distance to minor airport"
label var Lndist_stat_freight "Ln Distance to freight station"
label var Lndist_junction "Ln Distance to junction"
label var Lnemp "Ln Employment"
label var Lnda_density05 "Ln Population density 500m"
label var spacematep "\# neighbors"
label var spacematep2 "\# neighbors$^{2}$"
label var spacematep3 "\# neighbors$^{3}$"
label var spacematep4 "\# neighbors$^{4}$"
label var spacemateb "\# neighbors"
label var spacemateb2 "\# neighbors$^{2}$"
label var spacemateb3 "\# neighbors$^{3}$"
label var spacemateb4 "\# neighbors$^{4}$"
label var NBdiffproduct "\# products"
label var NBdiffnaics4 "\# 4-digit NAICS"
label var NBdiffbusiness "\# functions in the estab."
/*label var number_NEQ "\# neighbors (QC)"
label var number_NEQ2 "\# neighbors$^{2}$ (QC)"
label var number_NEQ3 "\# neighbors$^{3}$ (QC)"
label var number_NEQ4 "\# neighbors$^{4}$ (QC)"*/

label var Lnpopcma "Ln CMA's population"
label var Lnareacma "Ln CMA surface area"
label var dist_min_NEW "Distance to the closest city centre"
label var dist_weighted_NEW "Weighted Distance to city centre(s)"
label var Lndist_maj_air "Ln Distance to major airport"
label var Lndist_maj_sea "Ln Distance to major seaport"
label var Lndist_min_air "Ln Distance to minor airport"
label var Lndist_stat_freight "Ln Distance to freight station"
label var Lndist_junction "Ln Distance to junction"
label var Lnemp "Ln Employment"
label var Lnda_density05 "Ln Population density 500m"
label var Lnda_density15 "Ln Population density 1500m"
label var Lnpopcma "Ln Population CMA"
label var spacematep "\# neighbors"
label var spacematep2 "\# neighbors$^{2}$"
label var spacematep3 "\# neighbors$^{3}$"
label var spacematep4 "\# neighbors$^{4}$"
label var spacemateb "\# neighbors"
label var spacemateb2 "\# neighbors$^{2}$"
label var spacemateb3 "\# neighbors$^{3}$"
label var spacemateb4 "\# neighbors$^{4}$"
label var NBdiffproduct "\# products"
label var NBdiffnaics4 "\# 4-digit NAICS"
label var NBdiffbusiness "\# functions in the estab."
//label var number_NEQ "\# neighbors (QC)"
//label var number_NEQ2 "\# neighbors$^{2}$ (QC)"
//label var number_NEQ3 "\# neighbors$^{3}$ (QC)"
//label var number_NEQ4 "\# neighbors$^{4}$ (QC)"


*We create the sample dummies
gen samplep = qualityp == 1 & Lnemp != . & Lnda_density05 != .& exports != .
gen sampleb = qualityb == 1 & Lnemp != . & Lnda_density05 != . & exports != .

compress

save "$outpath/database_for_reg.dta", replace

